********************************************************************************
********    		  1.1. Baseline controls (1984 - 1994) 		    ************
********************************************************************************
/*
Employment shares of broad industry sectors for 1994 based on 10 previous years employees sampled in the SIAB firm sample.

This is used for replication of DFSW 2019.
*/

use ${orig}/SIAB_7517_v1_bhp_basis_v1, clear

*all missings to .
foreach var of varlist _all {
        replace `var' = . if `var'==.n
		replace `var' = . if `var'==.z
}

rename ao_kreis region
rename jahr year
gen emp = az_vz

drop if region == . | year == . | w08_3_gen == .

drop if year > 1994 | year < 1984
bysort betnr (year) : drop if  _n != _N




*Broad industry groups

* 0. manufacturing
gen employment_manufacturing = emp if w08_3_gen >= 100 & w08_3_gen < 360

*1 Agriculture / Mining
gen employment_agriculture = emp if w08_3_gen <=99

*2 Food products
gen employment_food_products = emp if inrange(w08_3_gen,100,129)

*3 consumer goods + printing and copying
gen employment_consumer_goods = emp if inrange(w08_3_gen,130,159) | inrange(w08_3_gen,180,189) | inrange(w08_3_gen,310,329)

*4 industrial goods
gen employment_industrial_goods = emp if inrange(w08_3_gen,160,179) | inrange(w08_3_gen,190,259)

*5 capital goods + repair and installation of machines
gen employment_capital_goods = emp if inrange(w08_3_gen,260,309) | inrange(w08_3_gen,330,339)

*6 construction+provisioning
gen employment_construction = emp if inrange(w08_3_gen,350,439)

*7 personal services (trade+gastronomy+publishing+broadcasting+veterenarians+travel agencies+arts+other+households)
gen employment_maintenance = emp if inrange(w08_3_gen,450,479) | inrange(w08_3_gen,550,609) | w08_3_gen==750 | inrange(w08_3_gen,790,799) | inrange(w08_3_gen,900,939)  | inrange(w08_3_gen,960,989)

*8 business related services (transport+communication+finance+real estate + consulting + rening/leasing+security+cleaning+other+repair)
gen employment_services = emp if inrange(w08_3_gen,490,539) | inrange(w08_3_gen,610,749) | inrange(w08_3_gen,770,789) | inrange(w08_3_gen,800,829) | inrange(w08_3_gen,950,952)

*9 public sector + political parties and other organizations
gen employment_public = emp if inrange(w08_3_gen,840,889) | inrange(w08_3_gen,940,949) | w08_3_gen==990

collapse (sum) emp ///
		 (sum) employment_manufacturing ///
		 (sum) employment_agriculture  ///
		 (sum) employment_food_products ///
		 (sum) employment_consumer_goods ///
		 (sum) employment_industrial_goods ///
		 (sum) employment_capital_goods ///
		 (sum) employment_construction  ///
		 (sum) employment_maintenance ///
		 (sum) employment_services  ///
		 (sum) employment_public ///
		 , by(region) //state


 drop if region == .

 label var emp "FT employment from SIAB-BHP 1984-1994"
 label var employment_manufacturing "FT employment manufacturing from SIAB-BHP 1984-1994"
 label var employment_agriculture "FT employment agriculture from SIAB-BHP 1984-1994"
 label var employment_food_products "FT employment food prod. from SIAB-BHP 1984-1994"
 label var employment_consumer_goods "FT employment consumer prod. from SIAB-BHP 1984-1994"
 label var employment_industrial_goods "FT employment industrial goods from SIAB-BHP 1984-1994"
 label var employment_capital_goods "FT employment capital goods from SIAB-BHP 1984-1994"
 label var employment_construction "FT employment construction from SIAB-BHP 1984-1994"
 label var employment_maintenance "FT employment maintenance from SIAB-BHP 1984-1994"
 label var employment_services "FT employment services from SIAB-BHP 1984-1994"
 label var employment_public "FT employment public sector from SIAB-BHP 1984-1994"

foreach x of var * {
	rename `x' base_`x'
}
rename base_region region

gen year = 1994

save ${data}/baseline_controls.dta, replace




********************************************************************************
****  1.2  Calculate Regional Employment share (1984 - 1994)              ******
********************************************************************************

/*
Local employment composition based on 10 previous years firms sampled in the SIAB 
firm sample. 
This is done for different industry classifications and levels of aggregation to 
fit the different types of data (robots, ICT, trade).
Note that employment counts by sector / region are rescaled by the precise number 
of total employees per region in 1994 taken from replication data of Dauth, 
Findeisen and Suedekum (2017). This allows us to get absolute size of regional labor 
market right which is necessary as we have more data for West (1984-1994) than for
the East Germany (1990-1994) which would lead to an underestimation of the labor 
force in the East as we aggregate information from all years to get the distribution
across sectors which is as accurate as possible.
*/


********************************************************************************
***           Aggregate to Kreis, NACErev2 3-digits (ROBOTS)           *********
********************************************************************************
use ${orig}/SIAB_7517_v1_bhp_basis_v1, clear

*all missings to .
foreach var of varlist _all {
        replace `var' = . if `var'==.n
		replace `var' = . if `var'==.z
}

rename ao_kreis region
rename jahr year
gen emp = az_vz  //+ 0.5 * az_tz
gen az_emp = az_vz  //+ 0.5 * az_tz
gen n_establishments = 1 // n reflects number of underlying establishments
gen nace2_3d = w08_3_gen

drop if region == . | year == . | w08_3_gen == .

drop if year > 1994 | year < 1984
bysort betnr (year) : drop if  _n != _N

* collapse by region, industry code
collapse (sum) employment_r_s_y = az_emp (sum) n_establishments, ///
	by(nace2_3d w08_3_gen region)
sort region nace2_3d

* resize total employment of region by DFS 2017 replication data
merge m:1 region using ${orig}/employment_DFS.dta
drop _merge
bysort region: egen employment_r = sum(employment_r_s_y)
replace employment_r_s_y = employment_r_s_y / employment_r * emp_DFS

* Repeat employment values for all years 1994-2018 so it can be easily merged on panel
forvalues year=1994/2018{
di "`year'"
gen employment_r_s_y`year' = employment_r_s_y
}
drop employment_r_s_y
reshape long employment_r_s_y, i(region w08_3_gen) j(year)

save $data/bhp_employment_r_s_y_NACErev2_3d_1994.dta, replace

********************************************************************************
***           Aggregate to Kreis, NACErev2 2-digit (EUKLEMS, ROBOTS)   *********
********************************************************************************
// 2-digit level (for those robots industries where we only have 2-digit info)
gen nace2_2d = floor(w08_3_gen/10)

collapse (sum) employment_r_s_y = employment_r_s_y (sum) n_establishments, ///
	by(year nace2_2d region)
sort region nace2_2d year
drop if region == . | year == . | nace2_2d == . // drop incomplete observations
save $data/bhp_employment_r_s_y_NACErev2_2d_1994.dta, replace


********************************************************************************
***           Aggregate to Kreis, NACErev1  (for TRADE data)           *********
********************************************************************************
use ${orig}/SIAB_7517_v1_bhp_basis_v1, clear

*all missings to .
foreach var of varlist _all {
        replace `var' = . if `var'==.n
		replace `var' = . if `var'==.z
}

rename ao_kreis region
rename jahr year
gen emp = az_vz  //+ 0.5 * az_tz
gen az_emp = az_vz  //+ 0.5 * az_tz
gen n_establishments = 1 // n reflects number of underlying establishments
gen nace1_3d = w93_3_gen

drop if region == . | year == . | w08_3_gen == .

drop if year > 1994 | year < 1984
bysort betnr (year) : drop if  _n != _N

collapse (sum) employment_r_s_y = az_emp (sum) n_establishments, ///
	by(nace1_3d w93_3_gen region)
sort region nace1_3d


* resize total employment by DFS 2017 replication data
merge m:1 region using ${orig}/employment_DFS.dta
drop _merge
bysort region: egen employment_r = sum(employment_r_s_y)
replace employment_r_s_y = employment_r_s_y / employment_r * emp_DFS

* Repeat employment values for all years 1994-2017 so it can be easily merged on panel
forvalues year=1994/2018{
di "`year'"
gen employment_r_s_y`year' = employment_r_s_y
}
drop employment_r_s_y
reshape long employment_r_s_y, i(region w93_3_gen) j(year)
gen   nace = w93_3_gen
save $data/bhp_employment_r_s_y_NACErev1_3d_1994.dta, replace


*create 2-digit industries
gen nace1_2d = floor(w93_3_gen/10)

collapse (sum) employment_r_s_y = employment_r_s_y (sum) n_establishments, ///
	by(year nace1_2d region)
sort region nace1_2d year
save $data/bhp_employment_r_s_y_NACErev1_2d_1994.dta, replace


********************************************************************************
***           Aggregate to Industry, NACErev1  (for TRADE data)        *********
********************************************************************************
***calculate industry weights by employment between : on NACErev1 3-digit level (wz93_3_gen)
use $data/bhp_employment_r_s_y_NACErev1_3d_1994.dta, clear
keep if year == 1994

*aggregate to industries
collapse (sum) employees_nace = employment_r_s_y, by(nace)
save $data/industry_weights_nace_3d.dta, replace






